This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
library(SingleCellExperiment)
载入需要的程辑包:SummarizedExperiment
载入需要的程辑包:GenomicRanges
载入需要的程辑包:stats4
载入需要的程辑包:S4Vectors
载入程辑包:‘S4Vectors’
The following object is masked from ‘package:Matrix’:
expand
The following object is masked from ‘package:plotly’:
rename
The following object is masked from ‘package:tidyr’:
expand
The following objects are masked from ‘package:dplyr’:
first, rename
The following object is masked from ‘package:base’:
expand.grid
载入需要的程辑包:IRanges
载入程辑包:‘IRanges’
The following object is masked from ‘package:purrr’:
reduce
The following object is masked from ‘package:xgboost’:
slice
The following object is masked from ‘package:plotly’:
slice
The following objects are masked from ‘package:dplyr’:
collapse, desc, slice
载入需要的程辑包:GenomeInfoDb
载入需要的程辑包:DelayedArray
载入需要的程辑包:matrixStats
载入程辑包:‘matrixStats’
The following objects are masked from ‘package:Biobase’:
anyMissing, rowMedians
The following object is masked from ‘package:dplyr’:
count
载入需要的程辑包:BiocParallel
载入程辑包:‘DelayedArray’
The following objects are masked from ‘package:matrixStats’:
colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
The following object is masked from ‘package:igraph’:
simplify
The following object is masked from ‘package:purrr’:
simplify
The following object is masked from ‘package:clusterProfiler’:
simplify
The following objects are masked from ‘package:base’:
aperm, apply, rowsum
载入程辑包:‘SummarizedExperiment’
The following object is masked from ‘package:Seurat’:
Assays
library(scmap)
Creating a generic function for ‘toJSON’ from package ‘jsonlite’ in package ‘googleVis’
library(Seurat)
library(xgboost)
library(Matrix)
set.seed(1)
source("tianfengRwrappers.R")
ds0 <- readRDS("ds0.rds")
ref_sce <- as.SingleCellExperiment(ds0)
logcounts(ref_sce) <- log2(counts(ref_sce) + 1)
counts(ref_sce) <- as.matrix(counts(ref_sce))
# normcounts(ref_sce) <- as.matrix(normcounts(ref_sce))
logcounts(ref_sce) <- as.matrix(logcounts(ref_sce))
rowData(ref_sce)$feature_symbol <- rownames(ref_sce)
ref_sce <- ref_sce[!duplicated(rownames(ref_sce)), ]
ref_sce <- selectFeatures(ref_sce, suppress_plot = FALSE)
Warning: 'isSpike' is deprecated.
See help("Deprecated")
ref_sce <- indexCell(ref_sce)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
scmapCell_results <- scmapCell(ref_sce, list(ds0 = metadata(ref_sce)$scmap_cell_index))
scmapCell_clusters <- scmapCell2Cluster(
scmapCell_results,
list(as.character(colData(ref_sce)$Classification1)))
temp <- data.frame(scmapCell_clusters$scmap_cluster_labs[,"ds0"],row.names = colnames(ds0))
Idents(ds0) <- temp
ggsave("./scmap/scmap_ds0tods0.svg", device = svg, width = 6, height = 4, plot = umapplot(ds0))
fig <- plot_ly(data.frame(table(temp)), labels = ~temp, values = ~Freq, type = 'pie',
textposition = 'inside',
textinfo = 'label+percent+value',
insidetextfont = list(color = '#000000'),
hoverinfo = 'text',
text = ~paste0('cell numbers: ', Freq),
marker = list(colors = colors_list,
line = list(color = '#FFFFFF', width = 0)),
showlegend = FALSE) %>% layout(title = 'scmap_ds0tods0',
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
font = list(family = "Arial", size = 25, color = "black"))
fig
mclust::adjustedRandIndex(temp[,1], ds0$Classification1)
[1] 0.8925608
conmat <- table(as.character(ds0$Classification1), temp[,1], dnn=c("true","pre"))
conmat_prop <- prop.table(conmat, 1)
conmat_prop
pre
true Fibroblast Fibromyocyte Pericyte SMC unassigned Unknown
Fibroblast 0.9745106964 0.0009103323 0.0000000000 0.0000000000 0.0241238052 0.0004551661
Fibromyocyte 0.0086419753 0.7777777778 0.0012345679 0.0086419753 0.2037037037 0.0000000000
Pericyte 0.0016090105 0.0048270314 0.9452936444 0.0016090105 0.0466613033 0.0000000000
SMC 0.0000000000 0.0173010381 0.0057670127 0.8223760092 0.1545559400 0.0000000000
Unknown 0.0833333333 0.0000000000 0.0000000000 0.0000000000 0.1715686275 0.7450980392
confuse_bubblemat(conmat_prop, rownames(conmat_prop), colnames(conmat_prop),"ds0_scmap")
Idents(ds0) <- ds0$Classification1
ds0 <- RenameIdents(ds0, 'Fibroblast' = 0, 'SMC' = 1, 'Fibromyocyte' = 2, 'Pericyte' = 3, 'Unknown' = 4)
umapplot(ds0)
ds0_data <- get_data_table(ds0, highvar = F, type = "data")
ds0_label <- as.numeric(as.character(Idents(ds0)))
set.seed(7)
index <- c(1:dim(ds0_data)[2]) %>% sample(ceiling(0.3*dim(ds0_data)[2]), replace = F, prob = NULL)
colnames(ds0_data) <- NULL
ds0_train_data <- list(data = t(as(ds0_data[,-index],"dgCMatrix")), label = ds0_label[-index])
ds0_test_data <- list(data = t(as(ds0_data[,index],"dgCMatrix")), label = ds0_label[index])
ds0_train <- xgb.DMatrix(data = ds0_train_data$data,label = ds0_train_data$label)
ds0_test <- xgb.DMatrix(data = ds0_test_data$data,label = ds0_test_data$label)
watchlist <- list(train = ds0_train, eval = ds0_test)
xgb_param <- list(eta = 0.2, max_depth = 6,
subsample = 0.6, num_class = length(table(Idents(ds0))),
objective = "multi:softmax", eval_metric = 'mlogloss')
bst_model <- xgb.train(xgb_param, ds0_train, nrounds = 100, watchlist, verbose = 0)
predict_ds0_test <- round(predict(bst_model, newdata = ds0_test))
ds0_confuse_matrix_test <- table(ds0_test_data$label, predict_ds0_test, dnn=c("true","pre"))
ds0_confuse_matrix_test_prop <- prop.table(ds0_confuse_matrix_test, 1)
ds0_confuse_matrix_test_prop
pre
true 0 1 2 3 4
0 0.989345510 0.000000000 0.004566210 0.000000000 0.006088280
1 0.000000000 0.923344948 0.052264808 0.024390244 0.000000000
2 0.025316456 0.042194093 0.919831224 0.012658228 0.000000000
3 0.000000000 0.002849003 0.005698006 0.991452991 0.000000000
4 0.076923077 0.000000000 0.015384615 0.000000000 0.907692308
confuse_bubblemat(ds0_confuse_matrix_test_prop, c("Fibroblast", "SMC", "Fibromyocyte", "Pericyte", "Unknown"), c("Fibroblast", "SMC", "Fibromyocyte", "Pericyte", "Unknown"),"ds0_pretrain")
adjustedRandIndex(ds0_test_data$label, predict_ds0_test) #分类器性能
[1] 0.9316151
set.seed(1)
ds2 <- readRDS("ds2.rds")
query_sce <- as.SingleCellExperiment(ds2)
logcounts(query_sce) <- log2(counts(query_sce) + 1)
counts(query_sce) <- as.matrix(counts(query_sce))
# normcounts(query_sce) <- as.matrix(normcounts(query_sce))
logcounts(query_sce) <- as.matrix(logcounts(query_sce))
rowData(query_sce)$feature_symbol <- rownames(query_sce)
query_sce <- query_sce[!duplicated(rownames(query_sce)), ]
query_sce <- selectFeatures(query_sce, suppress_plot = FALSE)
Warning: 'isSpike' is deprecated.
See help("Deprecated")
query_sce <- indexCell(query_sce)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
scmapCell_results <- scmapCell(query_sce, list(ds2 = metadata(query_sce)$scmap_cell_index))
scmapCell_clusters <- scmapCell2Cluster(
scmapCell_results,
list(as.character(colData(query_sce)$Classification1)))
temp <- data.frame(scmapCell_clusters$scmap_cluster_labs[,"ds2"],row.names = colnames(ds2))
Idents(ds2) <- temp
ggsave("./scmap/scmap_ds2tods2.svg", device = svg, width = 6, height = 4, plot = umapplot(ds2))
fig <- plot_ly(data.frame(table(temp)), labels = ~temp, values = ~Freq, type = 'pie',
textposition = 'inside',
textinfo = 'label+percent+value',
insidetextfont = list(color = '#000000'),
hoverinfo = 'text',
text = ~paste0('cell numbers: ', Freq),
marker = list(colors = colors_list,
line = list(color = '#FFFFFF', width = 0)),
showlegend = FALSE) %>% layout(title = 'scmap_ds2tods2',
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
font = list(family = "Arial", size = 25, color = "black"))
fig
mclust::adjustedRandIndex(temp[,1], ds2$Classification1)
[1] 0.776329
conmat <- table(as.character(ds2$Classification1), temp[,1], dnn=c("true","pre"))
conmat_prop <- prop.table(conmat, 1)
conmat_prop
pre
true Fibroblast Fibromyocyte Pericyte SMC1 SMC2 unassigned
Fibroblast 0.9807846277 0.0032025620 0.0000000000 0.0000000000 0.0000000000 0.0160128102
Fibromyocyte 0.0031222123 0.7814451383 0.0049063336 0.0107047279 0.0026761820 0.1971454059
Pericyte 0.0000000000 0.0101073910 0.8180669615 0.0063171194 0.0000000000 0.1655085281
SMC1 0.0000000000 0.0058292043 0.0040804430 0.8837073739 0.0040804430 0.1023025357
SMC2 0.0009689922 0.0000000000 0.0009689922 0.0096899225 0.8226744186 0.1656976744
Idents(ds2) <- ds2$Classification1
ds2 <- RenameIdents(ds2, 'SMC1' = 0, 'Fibromyocyte' = 1, 'Pericyte' = 2, 'Fibroblast' = 3, 'SMC2' = 4)
ds2_data <- get_data_table(ds2, highvar = F, type = "data")
ds2_label <- as.numeric(as.character(Idents(ds2)))
set.seed(7)
index <- c(1:dim(ds2_data)[2]) %>% sample(ceiling(0.3*dim(ds2_data)[2]), replace = F, prob = NULL)
colnames(ds2_data) <- NULL
ds2_train_data <- list(data = t(as(ds2_data[,-index],"dgCMatrix")), label = ds2_label[-index])
ds2_test_data <- list(data = t(as(ds2_data[,index],"dgCMatrix")), label = ds2_label[index])
ds2_train <- xgb.DMatrix(data = ds2_train_data$data,label = ds2_train_data$label)
ds2_test <- xgb.DMatrix(data = ds2_test_data$data,label = ds2_test_data$label)
watchlist <- list(train = ds2_train, eval = ds2_test)
xgb_param <- list(eta = 0.2, max_depth = 6,
subsample = 0.6, num_class = length(table(Idents(ds2))),
objective = "multi:softmax", eval_metric = 'mlogloss')
bst_model <- xgb.train(xgb_param, ds2_train, nrounds = 100, watchlist, verbose = 0)
ds2_confuse_matrix_test_prop
pre
true 0 1 2 3 4
0 0.964843750 0.017578125 0.011718750 0.000000000 0.005859375
1 0.021865889 0.963556851 0.005830904 0.002915452 0.005830904
2 0.037946429 0.033482143 0.928571429 0.000000000 0.000000000
3 0.000000000 0.012886598 0.000000000 0.987113402 0.000000000
4 0.012658228 0.025316456 0.000000000 0.000000000 0.962025316
scmapCell_results <- scmapCell(query_sce, list(ds0 = metadata(ref_sce)$scmap_cell_index))
scmapCell_clusters <- scmapCell2Cluster(
scmapCell_results,
list(as.character(colData(ref_sce)$Classification1)))
temp <- data.frame(scmapCell_clusters$scmap_cluster_labs[,"ds0"],row.names = colnames(ds2))
Idents(ds2) <- temp
# ggsave("./scmap/scmap_ds2tods0.svg", device = svg, width = 6, height = 4, plot = umapplot(ds2))
fig <- plot_ly(data.frame(table(temp)), labels = ~temp, values = ~Freq, type = 'pie',
textposition = 'inside',
textinfo = 'label+percent+value',
insidetextfont = list(color = '#000000'),
hoverinfo = 'text',
text = ~paste0('cell numbers: ', Freq),
marker = list(colors = colors_list,
line = list(color = '#FFFFFF', width = 0)),
showlegend = FALSE) %>% layout(title = 'scmap_ds2tods0',
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
font = list(family = "Arial", size = 25, color = "black"))
fig
ds0FbM <- subset(ds0,ident = "Fibromyocyte")
ds2FbM <- subset(ds2,ident = "Fibromyocyte")
ds0data <- get_data_table(ds0FbM,type = "data")
ds2data <- get_data_table(ds2FbM,type = "data")
# genes_to_show <- c("IGFBP2","MGP","MYH11","DCN","TNFRSF11B")
genes_to_show <- c("DCN","LUM","MMP2","ACTA2","TNFRSF11B","FBLN1")
merge_expr <- data.frame()
for (i in lapply(genes_to_show, func1,"ds0",ds0data))
{
merge_expr <- rbind(merge_expr,i)
}
for (i in lapply(genes_to_show, func1,"ds2",ds2data))
{
merge_expr <- rbind(merge_expr,i)
}
rownames(merge_expr) <- NULL
Data_summary <- Rmisc::summarySE(merge_expr, measurevar="expr", groupvars=c("sample","gene"))
head(Data_summary)
ggobj <- ggplot(merge_expr,aes(x = gene, y = expr,fill = sample)) +
geom_split_violin(trim= F, color="white", scale = "area") +
geom_point(data = Data_summary,aes(x = gene, y= expr), pch=19,
position=position_dodge(0.2),size= 1) + #绘制均值位置
geom_errorbar(data = Data_summary, aes(ymin = expr-ci, ymax= expr+ci),
width= 0.05,
position= position_dodge(0.2), #误差线位置,和均值位置相匹配
color="black",
alpha = 0.7,
size= 0.5) +
scale_fill_manual(values = c("#b1d6fb", "#fd9999"))+
labs(y=("Log2 expression"),x=NULL,title = "Split violin") +
theme_classic()+ mytheme + stat_compare_means(aes(group = sample),
label = "p.format",
method = "wilcox.test",
label.y = max(merge_expr$expr),
hide.ns = F)
ggobj
ggsave("./scmap/scmapsupds0tods2.svg", device = svg, plot = ggobj, height = 3, width = 5)
ds2 <- readRDS("ds2.rds")
ref_sce <- as.SingleCellExperiment(ds2)
logcounts(ref_sce) <- log2(counts(ref_sce) + 1)
counts(ref_sce) <- as.matrix(counts(ref_sce))
# normcounts(ref_sce) <- as.matrix(normcounts(ref_sce))
logcounts(ref_sce) <- as.matrix(logcounts(ref_sce))
rowData(ref_sce)$feature_symbol <- rownames(ref_sce)
ref_sce <- ref_sce[!duplicated(rownames(ref_sce)), ]
ref_sce <- selectFeatures(ref_sce, suppress_plot = FALSE)
ref_sce <- indexCell(ref_sce)
ds0 <- readRDS("ds0.rds")
query_sce <- as.SingleCellExperiment(ds0)
logcounts(query_sce) <- log2(counts(query_sce) + 1)
counts(query_sce) <- as.matrix(counts(query_sce))
# normcounts(query_sce) <- as.matrix(normcounts(query_sce))
logcounts(query_sce) <- as.matrix(logcounts(query_sce))
rowData(query_sce)$feature_symbol <- rownames(query_sce)
query_sce <- query_sce[!duplicated(rownames(query_sce)), ]
query_sce <- selectFeatures(query_sce, suppress_plot = FALSE)
query_sce <- indexCell(query_sce)
scmapCell_results <- scmapCell(query_sce, list(ds2 = metadata(ref_sce)$scmap_cell_index))
scmapCell_clusters <- scmapCell2Cluster(
scmapCell_results,
list(as.character(colData(ref_sce)$Classification1)))
temp <- data.frame(scmapCell_clusters$scmap_cluster_labs[,"ds2"], row.names = colnames(ds0))
Idents(ds0) <- temp
ggsave("./scmap/scmap_ds0tods2.svg", device = svg, width = 6, height = 4, plot = umapplot(ds0))
fig <- plot_ly(data.frame(table(temp)), labels = ~temp, values = ~Freq, type = 'pie',
textposition = 'inside',
textinfo = 'label+percent+value',
insidetextfont = list(color = '#000000'),
hoverinfo = 'text',
text = ~paste0('cell numbers: ', Freq),
marker = list(colors = colors_list,
line = list(color = '#FFFFFF', width = 0)),
showlegend = FALSE) %>% layout(title = 'scmap_ds0tods2',
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
font = list(family = "Arial", size = 25, color = "black"))
fig
ds0FbM <- subset(ds0,ident = "Fibromyocyte")
ds2FbM <- subset(ds2,ident = "Fibromyocyte")
ds0data <- get_data_table(ds0FbM,type = "data")
ds2data <- get_data_table(ds2FbM,type = "data")
# genes_to_show <- c("IGFBP2","MGP","MYH11","DCN","TNFRSF11B")
genes_to_show <- c("DCN","LUM","MMP2","ACTA2","TNFRSF11B","FBLN1")
merge_expr <- data.frame()
for (i in lapply(genes_to_show, func1,"ds0",ds0data))
{
merge_expr <- rbind(merge_expr,i)
}
for (i in lapply(genes_to_show, func1,"ds2",ds2data))
{
merge_expr <- rbind(merge_expr,i)
}
rownames(merge_expr) <- NULL
Data_summary <- Rmisc::summarySE(merge_expr, measurevar="expr", groupvars=c("sample","gene"))
head(Data_summary)
ggobj <- ggplot(merge_expr,aes(x = gene, y = expr,fill = sample)) +
geom_split_violin(trim= F, color="white", scale = "area") +
geom_point(data = Data_summary,aes(x = gene, y= expr), pch=19,
position=position_dodge(0.2),size= 1) + #绘制均值位置
geom_errorbar(data = Data_summary, aes(ymin = expr-ci, ymax= expr+ci),
width= 0.05,
position= position_dodge(0.2), #误差线位置,和均值位置相匹配
color="black",
alpha = 0.7,
size= 0.5) +
scale_fill_manual(values = c("#b1d6fb", "#fd9999"))+
labs(y=("Log2 expression"),x=NULL,title = "Split violin") +
theme_classic()+ mytheme + stat_compare_means(aes(group = sample),
label = "p.format",
method = "wilcox.test",
label.y = max(merge_expr$expr),
hide.ns = F)
ggobj
ggsave("./scmap/scmapsupds2tods0.svg", device = svg, plot = ggobj, height = 6, width = 10)
ds1 <- readRDS("ds1.rds")
ref_sce <- as.SingleCellExperiment(ds1)
logcounts(ref_sce) <- log2(counts(ref_sce) + 1)
counts(ref_sce) <- as.matrix(counts(ref_sce))
# normcounts(ref_sce) <- as.matrix(normcounts(ref_sce))
logcounts(ref_sce) <- as.matrix(logcounts(ref_sce))
rowData(ref_sce)$feature_symbol <- rownames(ref_sce)
ref_sce <- ref_sce[!duplicated(rownames(ref_sce)), ]
ref_sce <- selectFeatures(ref_sce, suppress_plot = FALSE)
Warning: 'isSpike' is deprecated.
See help("Deprecated")
ref_sce <- indexCell(ref_sce)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
scmapCell_results <- scmapCell(ref_sce, list(ds1 = metadata(ref_sce)$scmap_cell_index))
Warning: stack imbalance in '<-', 2 then 1
scmapCell_clusters <- scmapCell2Cluster(
scmapCell_results,
list(as.character(colData(ref_sce)$Classification1)))
temp <- data.frame(scmapCell_clusters$scmap_cluster_labs[,"ds1"],row.names = colnames(ds1))
Idents(ds1) <- temp
ggsave("./scmap/scmap_ds1tods1.svg", device = svg, width = 6, height = 4, plot = umapplot(ds1))
fig <- plot_ly(data.frame(table(temp)), labels = ~temp, values = ~Freq, type = 'pie',
textposition = 'inside',
textinfo = 'label+percent+value',
insidetextfont = list(color = '#000000'),
hoverinfo = 'text',
text = ~paste0('cell numbers: ', Freq),
marker = list(colors = colors_list,
line = list(color = '#FFFFFF', width = 0)),
showlegend = FALSE) %>% layout(title = 'scmap_ds1tods1',
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
font = list(family = "Arial", size = 25, color = "black"))
fig
mclust::adjustedRandIndex(temp[,1], ds1$Classification1)
[1] 0.7935582
conmat <- table(as.character(ds1$Classification1), temp[,1], dnn=c("true","pre"))
conmat_prop <- prop.table(conmat, 1)
conmat_prop
pre
true Fibromyocyte SMC1 SMC2 unassigned Unknown
Fibromyocyte 0.745973646 0.031478770 0.003660322 0.218887262 0.000000000
SMC1 0.002773376 0.939778130 0.000792393 0.056656101 0.000000000
SMC2 0.007518797 0.005012531 0.822055138 0.165413534 0.000000000
Unknown 0.000000000 0.000000000 0.000000000 0.115384615 0.884615385
confuse_bubblemat(conmat_prop, rownames(conmat_prop), colnames(conmat_prop),"ds1_scmap")
Idents(ds1) <- ds1$Classification1
ds1 <- RenameIdents(ds1, 'Unknown' = 0, 'SMC1' = 1, 'Fibromyocyte' = 2, 'SMC2' = 3)
ds1_data <- get_data_table(ds1, highvar = F, type = "data")
ds1_label <- as.numeric(as.character(Idents(ds1)))
set.seed(7)
index <- c(1:dim(ds1_data)[2]) %>% sample(ceiling(0.3*dim(ds1_data)[2]), replace = F, prob = NULL)
colnames(ds1_data) <- NULL
ds1_train_data <- list(data = t(as(ds1_data[,-index],"dgCMatrix")), label = ds1_label[-index])
ds1_test_data <- list(data = t(as(ds1_data[,index],"dgCMatrix")), label = ds1_label[index])
ds1_train <- xgb.DMatrix(data = ds1_train_data$data,label = ds1_train_data$label)
ds1_test <- xgb.DMatrix(data = ds1_test_data$data,label = ds1_test_data$label)
watchlist <- list(train = ds1_train, eval = ds1_test)
xgb_param <- list(eta = 0.2, max_depth = 6,
subsample = 0.6, num_class = length(table(Idents(ds1))),
objective = "multi:softmax", eval_metric = 'mlogloss')
bst_model <- xgb.train(xgb_param, ds1_train, nrounds = 100, watchlist, verbose = 0)
adjustedRandIndex(ds1_test_data$label, predict_ds1_test) #ARI =
[1] 0.8385574
Idents(ds0) <- ds0$Classification1
ds0 <- RenameIdents(ds0, 'Fibroblast' = 0, 'SMC' = 1, 'Fibromyocyte' = 2, 'Pericyte' = 3, 'Unknown' = 4)
ds0_data <- get_data_table(ds0, highvar = T, type = "data")
ds0_label <- as.numeric(as.character(Idents(ds0)))
ds0_ARI <- list()
for(i in seq(1:10))
{
set.seed(17*i)
index <- c(1:dim(ds0_data)[2]) %>% sample(ceiling(0.3*dim(ds0_data)[2]), replace = F, prob = NULL)
colnames(ds0_data) <- NULL
ds0_train_data <- list(data = t(as(ds0_data[,-index],"dgCMatrix")), label = ds0_label[-index])
ds0_test_data <- list(data = t(as(ds0_data[,index],"dgCMatrix")), label = ds0_label[index])
ds0_train <- xgb.DMatrix(data = ds0_train_data$data,label = ds0_train_data$label)
ds0_test <- xgb.DMatrix(data = ds0_test_data$data,label = ds0_test_data$label)
watchlist <- list(train = ds0_train, eval = ds0_test)
xgb_param <- list(eta = 0.2, max_depth = 6,
subsample = 0.6, num_class = length(table(Idents(ds0))),
objective = "multi:softmax", eval_metric = 'mlogloss')
bst_model <- xgb.train(xgb_param, ds0_train, nrounds = 100, watchlist, verbose = 0)
predict_ds0_test <- round(predict(bst_model, newdata = ds0_test))
ds0_ARI[i] <- adjustedRandIndex(ds0_test_data$label, predict_ds0_test)
}
Idents(ds1) <- ds1$Classification1
ds1 <- RenameIdents(ds1, 'Unknown' = 0, 'SMC1' = 1, 'Fibromyocyte' = 2, 'SMC2' = 3)
ds1_data <- get_data_table(ds1, highvar = T, type = "data")
ds1_label <- as.numeric(as.character(Idents(ds1)))
ds1_ARI <- list()
for(i in seq(1:10))
{
set.seed(17*i)
index <- c(1:dim(ds1_data)[2]) %>% sample(ceiling(0.3*dim(ds1_data)[2]), replace = F, prob = NULL)
colnames(ds1_data) <- NULL
ds1_train_data <- list(data = t(as(ds1_data[,-index],"dgCMatrix")), label = ds1_label[-index])
ds1_test_data <- list(data = t(as(ds1_data[,index],"dgCMatrix")), label = ds1_label[index])
ds1_train <- xgb.DMatrix(data = ds1_train_data$data,label = ds1_train_data$label)
ds1_test <- xgb.DMatrix(data = ds1_test_data$data,label = ds1_test_data$label)
watchlist <- list(train = ds1_train, eval = ds1_test)
xgb_param <- list(eta = 0.2, max_depth = 6,
subsample = 0.6, num_class = length(table(Idents(ds1))),
objective = "multi:softmax", eval_metric = 'mlogloss')
bst_model <- xgb.train(xgb_param, ds1_train, nrounds = 100, watchlist, verbose = 0)
predict_ds1_test <- round(predict(bst_model, newdata = ds1_test))
ds1_ARI[i] <- adjustedRandIndex(ds1_test_data$label, predict_ds1_test)
}
Idents(ds2) <- ds2$Classification1
ds2 <- RenameIdents(ds2, 'SMC1' = 0, 'Fibromyocyte' = 1, 'Pericyte' = 2, 'Fibroblast' = 3, 'SMC2' = 4)
ds2_data <- get_data_table(ds2, highvar = T, type = "data")
ds2_label <- as.numeric(as.character(Idents(ds2)))
ds2_ARI <- list()
for(i in seq(1:10))
{
set.seed(17*i)
index <- c(1:dim(ds2_data)[2]) %>% sample(ceiling(0.3*dim(ds2_data)[2]), replace = F, prob = NULL)
colnames(ds2_data) <- NULL
ds2_train_data <- list(data = t(as(ds2_data[,-index],"dgCMatrix")), label = ds2_label[-index])
ds2_test_data <- list(data = t(as(ds2_data[,index],"dgCMatrix")), label = ds2_label[index])
ds2_train <- xgb.DMatrix(data = ds2_train_data$data,label = ds2_train_data$label)
ds2_test <- xgb.DMatrix(data = ds2_test_data$data,label = ds2_test_data$label)
watchlist <- list(train = ds2_train, eval = ds2_test)
xgb_param <- list(eta = 0.2, max_depth = 6,
subsample = 0.6, num_class = length(table(Idents(ds2))),
objective = "multi:softmax", eval_metric = 'mlogloss')
bst_model <- xgb.train(xgb_param, ds2_train, nrounds = 100, watchlist, verbose = 0)
predict_ds2_test <- round(predict(bst_model, newdata = ds2_test))
ds2_ARI[i] <- adjustedRandIndex(ds2_test_data$label, predict_ds2_test)
}
ref_sce <- as.SingleCellExperiment(ds0)
logcounts(ref_sce) <- log2(counts(ref_sce) + 1)
counts(ref_sce) <- as.matrix(counts(ref_sce))
logcounts(ref_sce) <- as.matrix(logcounts(ref_sce))
rowData(ref_sce)$feature_symbol <- rownames(ref_sce)
ref_sce <- ref_sce[!duplicated(rownames(ref_sce)), ]
ref_sce <- selectFeatures(ref_sce, suppress_plot = FALSE)
Warning: 'isSpike' is deprecated.
See help("Deprecated")
scmapARI_ds0 <- list()
for(i in seq(1:10))
{
set.seed(17*i)
ref_sce <- indexCell(ref_sce)
scmapCell_results <- scmapCell(ref_sce, list(ds0 = metadata(ref_sce)$scmap_cell_index))
scmapCell_clusters <- scmapCell2Cluster(
scmapCell_results,
list(as.character(colData(ref_sce)$Classification1)))
temp <- data.frame(scmapCell_clusters$scmap_cluster_labs[,"ds0"],row.names = colnames(ds0))
scmapARI_ds0[i] <- mclust::adjustedRandIndex(temp[,1], ds0$Classification1)
}
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
ref_sce <- as.SingleCellExperiment(ds1)
logcounts(ref_sce) <- log2(counts(ref_sce) + 1)
counts(ref_sce) <- as.matrix(counts(ref_sce))
logcounts(ref_sce) <- as.matrix(logcounts(ref_sce))
rowData(ref_sce)$feature_symbol <- rownames(ref_sce)
ref_sce <- ref_sce[!duplicated(rownames(ref_sce)), ]
ref_sce <- selectFeatures(ref_sce, suppress_plot = FALSE)
Warning: 'isSpike' is deprecated.
See help("Deprecated")
scmapARI_ds1 <- list()
for(i in seq(1:10))
{
set.seed(17*i)
ref_sce <- indexCell(ref_sce)
scmapCell_results <- scmapCell(ref_sce, list(ds1 = metadata(ref_sce)$scmap_cell_index))
scmapCell_clusters <- scmapCell2Cluster(
scmapCell_results,
list(as.character(colData(ref_sce)$Classification1)))
temp <- data.frame(scmapCell_clusters$scmap_cluster_labs[,"ds1"],row.names = colnames(ds1))
scmapARI_ds1[i] <- mclust::adjustedRandIndex(temp[,1], ds1$Classification1)
}
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
ref_sce <- as.SingleCellExperiment(ds2)
logcounts(ref_sce) <- log2(counts(ref_sce) + 1)
counts(ref_sce) <- as.matrix(counts(ref_sce))
logcounts(ref_sce) <- as.matrix(logcounts(ref_sce))
rowData(ref_sce)$feature_symbol <- rownames(ref_sce)
ref_sce <- ref_sce[!duplicated(rownames(ref_sce)), ]
ref_sce <- selectFeatures(ref_sce, suppress_plot = FALSE)
Warning: 'isSpike' is deprecated.
See help("Deprecated")
scmapARI_ds2 <- list()
for(i in seq(1:10))
{
set.seed(17*i)
ref_sce <- indexCell(ref_sce)
scmapCell_results <- scmapCell(ref_sce, list(ds2 = metadata(ref_sce)$scmap_cell_index))
scmapCell_clusters <- scmapCell2Cluster(
scmapCell_results,
list(as.character(colData(ref_sce)$Classification1)))
temp <- data.frame(scmapCell_clusters$scmap_cluster_labs[,"ds2"],row.names = colnames(ds2))
scmapARI_ds2[i] <- mclust::adjustedRandIndex(temp[,1], ds2$Classification1)
}
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
ARI scmap XGBoost
ds0 0.8925608 0.9316151
ds1 0.7935582 0.8385574
ds2 0.776329 0.9002053
##SMC2的鉴别
ds2 <- readRDS("ds2.rds")
ref_sce <- as.SingleCellExperiment(ds2)
logcounts(ref_sce) <- log2(counts(ref_sce) + 1)
counts(ref_sce) <- as.matrix(counts(ref_sce))
# normcounts(ref_sce) <- as.matrix(normcounts(ref_sce))
logcounts(ref_sce) <- as.matrix(logcounts(ref_sce))
rowData(ref_sce)$feature_symbol <- rownames(ref_sce)
ref_sce <- ref_sce[!duplicated(rownames(ref_sce)), ]
ref_sce <- selectFeatures(ref_sce, suppress_plot = FALSE)
Warning: 'isSpike' is deprecated.
See help("Deprecated")
ref_sce <- indexCell(ref_sce)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
ds1 <- readRDS("ds1.rds")
query_sce <- as.SingleCellExperiment(ds1)
logcounts(query_sce) <- log2(counts(query_sce) + 1)
counts(query_sce) <- as.matrix(counts(query_sce))
# normcounts(query_sce) <- as.matrix(normcounts(query_sce))
logcounts(query_sce) <- as.matrix(logcounts(query_sce))
rowData(query_sce)$feature_symbol <- rownames(query_sce)
query_sce <- query_sce[!duplicated(rownames(query_sce)), ]
query_sce <- selectFeatures(query_sce, suppress_plot = FALSE)
Warning: 'isSpike' is deprecated.
See help("Deprecated")
query_sce <- indexCell(query_sce)
Parameter M was not provided, will use M = n_features / 10 (if n_features <= 1000), where n_features is the number of selected features, and M = 100 otherwise.
Parameter k was not provided, will use k = sqrt(number_of_cells)
scmapCell_results <- scmapCell(query_sce, list(ds2 = metadata(ref_sce)$scmap_cell_index))
scmapCell_clusters <- scmapCell2Cluster(
scmapCell_results,
list(as.character(colData(ref_sce)$Classification1)))
temp <- data.frame(scmapCell_clusters$scmap_cluster_labs[,"ds2"], row.names = colnames(ds1))
Idents(ds1) <- temp
ggsave("./scmap/scmap_ds1tods2.svg", device = svg, width = 6, height = 4, plot = umapplot(ds1))
Warning: Using `as.character()` on a quosure is deprecated as of rlang 0.3.0.
Please use `as_label()` or `as_name()` instead.
This warning is displayed once per session.
fig <- plot_ly(data.frame(table(temp)), labels = ~temp, values = ~Freq, type = 'pie',
textposition = 'inside',
textinfo = 'label+percent+value',
insidetextfont = list(color = '#000000'),
hoverinfo = 'text',
text = ~paste0('cell numbers: ', Freq),
marker = list(colors = colors_list,
line = list(color = '#FFFFFF', width = 0)),
showlegend = FALSE) %>% layout(title = 'scmap_ds1tods2',
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
font = list(family = "Arial", size = 25, color = "black"))
fig
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.